Three main aspects to portray urban decay:
# import required libraries
%matplotlib inline
import os
import fiona
# pretty printing - makes some kinds of text output easier to read
import pprint
import IPython
from matplotlib import pyplot as plt
import pandas as pd
import geopandas as gpd
from pandas import Series
from geopandas import GeoSeries, GeoDataFrame, read_file, gpd
import contextily as ctx
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
The primary goal of this study is to detect the differences between the period before the Strategic Neighborhood Fund program in 2014 and after the program in 2022. Five variables for each year resulting ten datasets for variables. To avoid repetition some functions are created.
# create function to read multiple files
def read_files(filenames, names):
dfs = []
for filename, name in zip(filenames, names):
try:
df = gpd.read_file(filename)
dfs.append((name,df))
except FileNotFoundError:
print(f"Error: File '{filename}' not found.")
except Exception as e:
print(f"Error: {e}")
return dfs
# list files to be read and their output names
filenames = ['data_raw/18f98a29-473c-4598-a95f-90a6aa3a6e952020330-1-20bvlw.3fg7n.shp',
'data_raw/Parcels.shp',
'data_raw/pop_detroit_2014.csv',
'data_raw/pop_detroit_2022.csv',
'data_raw/race_detroit_2014.csv',
'data_raw/race_detroit_2022.csv',
'data_raw/income_detroit_2014.csv',
'data_raw/income_detroit_2022.csv',
'data_raw/employment_detroit_2014.csv',
'data_raw/employment_detroit_2022.csv']
names = ['vacant2014','vacant2023','pop2014','pop2022',
'race2014','race2022','income2014','income2022','employ2014','employ2022']
dataframes = read_files(filenames, names)
# Convert each tuple (name, DataFrame) from result into separate variables (individual DataFrames)
for name, df in dataframes:
globals()[name] = df
# check files and columns
vacant2014.columns
# filter necessary columns
# Pct_VeryLi is the percentage of verylikely vacant (verylikely vacant building/total building) in census tract level
vacant2014 = vacant2014[['GEOID10','Pct_VeryLi','geometry']]
# check columns
vacant2023.columns
# filter necessary columns
# property_1 provide building type and condition
vacant2023 = vacant2023[['property_1','geometry']]
# decided to use the projection: EPSG 2898 and ensure all data match the projection
# change the projection
vacant2023 = vacant2023.to_crs("EPSG:2898")
# all data mostly already clean except vacant building data
# check columns
pop2014.columns
# check columns
pop2022.columns
# check columns
race2014.columns
# check columns
race2022.columns
# check columns
income2014.columns
# check columns
income2022.columns
# check columns
employ2014.columns
# check columns
employ2022.columns
All dataset (except building vacancy) originally have no geometry, but because we use geopandas to read the data, a geometry column is automatically added with no value. All dataset are also still in object type. Therefore, we need to add the real geometry data and change the type into integer or float. The geometry information can be found in census tract data. Since the census tract in 2014 and 2022 are different, we use both census tract data.
# load census tract 2014 data of Detroit city
tract_detroit2014 = gpd.read_file('data_raw/tract2014.shp')
# check and change the projection
tract_detroit2014 = tract_detroit2014.to_crs("EPSG:2898")
# create function to drop geometry and merge with census tract data 2014 shapefile
def filtermerge(variables, outputs):
dfs = []
for variable, output in zip(variables, outputs):
try:
variable.drop(columns=['geometry'], inplace=True)
df = tract_detroit2014.merge(variable, on='GEOID')
dfs.append((output, df))
except FileNotFoundError:
print(f"Error: File '{variables}' not found.")
except Exception as e:
print(f"Error: {e}")
return dfs
# list files to be read and their output names
variables = [pop2014, race2014,
income2014, employ2014]
outputs = ['pop2014_','race2014_',
'income2014_','employ2014_']
dataframes2014 = filtermerge(variables, outputs)
# Convert each tuple (name, DataFrame) from result into separate variables (individual DataFrames)
for output, df in dataframes2014:
globals()[output] = df
# check the result
pop2014_.head(2)
# load census tract data 2020 of Detroit city
tract_detroit2020 = gpd.read_file('data_raw/Detroit_2020_Tracts_Joined.shp')
# to join with other data with GEOID, rename the FIPS to GEOID
tract_detroit2020.rename(columns={'FIPS': 'GEOID'}, inplace=True)
# change the projection
tract_detroit2020 = tract_detroit2020.to_crs("EPSG:2898")
# filter necessary columns
tract_detroit2020 = tract_detroit2020[['GEOID','geometry']]
# create function to drop geometry and merge with census tract data 2020 shapefile
def filtermerge2(variables2, outputs2):
dfs = []
for variable, output in zip(variables2, outputs2):
try:
variable.drop(columns=['geometry'], inplace=True)
df = tract_detroit2020.merge(variable, on='GEOID')
dfs.append((output, df))
except FileNotFoundError:
print(f"Error: File '{variables}' not found.")
except Exception as e:
print(f"Error: {e}")
return dfs
# list files to be read and their output names
variables2 = [pop2022, race2022,
income2022, employ2022]
outputs2 = ['pop2022_','race2022_',
'income2022_','employ2022_']
dataframes2020 = filtermerge2(variables2, outputs2)
# Convert each tuple (name, DataFrame) from result into separate variables (individual DataFrames)
for output, df in dataframes2020:
globals()[output] = df
# check the result
pop2022_.head(2)
# create function to change the column from object to integer
def convert_to_int(df,columns):
for column in columns:
df[column] = pd.to_numeric(df[column], errors='coerce').astype("int64")
return df
# change population data into integer
pop2014_['Population'] = pop2014_['Population'].astype(int)
# change population data into integer
pop2022_['Population'] = pop2022_['Population'].astype(int)
# change each column data into integer
race2014_ = convert_to_int(race2014_,["pop_white0","pop_black0","pop_asian0",
"pop_other0","pop_latino0"])
# change each column data into integer
race2022_ = convert_to_int(race2022_,["pop_white0","pop_black0","pop_asian0",
"pop_other0","pop_latino0"])
# change the NA value to 0 otherwise change to integer will not succeed
income2014_['Median income (dollar)'] = income2014_['Median income (dollar)'].replace("NA",0)
# change each column data into integer
income2014_ = convert_to_int(income2014_,["Mean income (dollar)","Median income (dollar)"])
# change the NA value to 0 otherwise change to integer will not succeed
income2022_['Median income (dollar)'] = income2022_['Median income (dollar)'].replace("NA",0)
# change each column data into integer
income2022_ = convert_to_int(income2022_,["Mean income (dollar)","Median income (dollar)"])
# change each column data into integer
employ2014_ = convert_to_int(employ2014_,["Employed","LaborForce","Unemployed"])
# change each column data into integer
employ2022_ = convert_to_int(employ2022_,["Employed","LaborForce","Unemployed"])
Since the focused in this study is the SNF program, the location of SNF neighborhoods also be presented and be compared with Detroit City.
# load SNF shapefile
snf = gpd.read_file('data_raw/SNF.shp')
# change the projection to match other data
snf = snf.to_crs("EPSG:2898")
snf.head(2)
# check the visualization of SNF neighborhoods
# Plot the GeoDataFrame
ax = snf.plot(figsize=(10, 8), color='none', edgecolor='black')
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# Add background map using contextily
ctx.add_basemap(ax, crs='EPSG:2898', source=ctx.providers.CartoDB.Positron)
# Customize the plot (add title, legend, etc.)
plt.title('SNF Neighborhoods in Detroit City')
plt.box(False)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('SNFneighborhoods.jpg')
# Show the plot
plt.show()
#inspect the data
pop2014_.head(2)
if (pop2014_['Population'] == 0).any().any():
print("DataFrame contains zero values")
else:
print("DataFrame does not contain zero values")
We can not remove rows with zero values in population because it will delete the census tract as well and affect the overall map.
# find the maximum value of population to find the scale
# to avoid bias, value scale in 2014 data and in 2022 data should be the same
pop2014_['Population'].max()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
pop2014_.plot(ax=ax, column="Population", cmap='cividis', legend="TRUE", vmax=6000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Population of Detroit City, 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('pop2014.jpg')
# Show the map
plt.show()
#inspect the data
pop2022_.head(2)
# find the maximum value of population to find the scale
pop2022_['Population'].max()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
pop2022_.plot(ax=ax, column="Population", cmap='cividis',
legend="TRUE",vmax=6000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Population of Detroit City, 2022')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('pop2022.jpg')
# Show the map
plt.show()
#inspect the data
race2014_.head(2)
# find the maximum value of column to find the scale
race2014_['pop_white0'].max()
# find the maximum value of column to find the scale
race2014_['pop_black0'].max()
# The visualization represents the two largest population shares: the White and Black communities
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
race2014_.plot(ax=ax, column="pop_white0", cmap='OrRd', legend="TRUE", vmax=3000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('White Population of Detroit City, 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Show the map
plt.show()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
race2014_.plot(ax=ax, column="pop_black0", cmap='OrRd', legend="TRUE", vmax=6000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Black Population of Detroit City, 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Show the map
plt.show()
# find the maximum value of column to find the scale
race2022_['pop_white0'].max()
# find the maximum value of column to find the scale
race2022_['pop_black0'].max()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
race2022_.plot(ax=ax, column="pop_white0", cmap='OrRd', legend="TRUE", vmax=3000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('White Population of Detroit City, 2022')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Show the map
plt.show()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
race2022_.plot(ax=ax, column="pop_black0", cmap='OrRd', legend="TRUE", vmax=6000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Black Population of Detroit City, 2022')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Show the map
plt.show()
To identify the racial composition in each SNF neighborhood, spatial join between race data and SNF data are conducted.
#overlay the two geodataframes
snf_race14 = gpd.overlay(snf,race2014_, how="intersection")
#check the result
snf_race14.head(2)
#aggregate each race population data based on SNF neighborhood
snf_race14_ = snf_race14.groupby("Proj_NAME").agg({"pop_white0":'sum',
"pop_black0":'sum',
"pop_asian0":'sum',
"pop_latino0":'sum',
"pop_other0":'sum'}).reset_index()
# inspect the result
snf_race14_
#plot the data to stacked bar chart
snf_race14_.plot(x="Proj_NAME",
kind="bar",
stacked=True,
ylim=(0,77000)) # set the scale to match with other data
label =["White","Black","Asian","Latino","Other"]
# Add labels and title
plt.xlabel('SNF Neighborhoods')
plt.ylabel('Population')
plt.title('Population by Race in 2014')
plt.legend(label)
# Save the data as an image file
plt.savefig('race2014.jpg')
# Show plot
plt.show()
#overlay the two geodataframes
snf_race22 = gpd.overlay(snf,race2022_, how="intersection")
#aggregate each race population data based on SNF neighborhood
snf_race22_ = snf_race22.groupby("Proj_NAME").agg({"pop_white0":'sum',
"pop_black0":'sum',
"pop_asian0":'sum',
"pop_latino0":'sum',
"pop_other0":'sum'}).reset_index()
#plot the data to stacked bar chart
snf_race22_.plot(x="Proj_NAME",
kind="bar",
stacked=True,
ylim=(0,77000)) # set the scale to match with other data
label =["White","Black","Asian","Latino","Other"]
# Add labels and title
plt.xlabel('SNF Neighborhood')
plt.ylabel('Population')
plt.title('Population by Race in 2022')
plt.legend(label)
# Save the data as an image file
plt.savefig('race2022.jpg')
# Show plot
plt.show()
#inspect the data
income2014_.head(2)
# find the maximum value of the column to find the scale
# to avoid bias, value scale in 2014 data and in 2022 data should be the same
income2014_['Median income (dollar)'].min()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
income2014_.plot(ax=ax, column="Median income (dollar)", cmap='Purples',
legend="TRUE", vmax=120000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Median Household Income of Detroit City, 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('income2014.jpg')
# Show the map
plt.show()
#inspect the data
income2022_.head(2)
# find the maximum value of the column to find the scale
income2022_['Median income (dollar)'].max()
Because of inflation, 1 dollar in 2014 is similar to 1.24 dollar in 2022. Therefore, with the previous map has the maximum value in 120,000, the maximum value set in 2022 data is 148,000.(https://www.in2013dollars.com/us/inflation/2014?endYear=2022&amount=1)
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
income2022_.plot(ax=ax, column="Median income (dollar)", cmap='Purples',
legend="TRUE", vmax=148000)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Median Household Income of Detroit City, 2022')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('income2022.jpg')
# Show the map
plt.show()
#inspect the data
employ2014_.head(2)
# calculate unemployment rate
employ2014_["unemployment_rate"] = employ2014_["Unemployed"] / employ2014_["LaborForce"]
# find the maximum value of the column to find the scale
# to avoid bias, value scale in 2014 data and in 2022 data should be the same
print(employ2014_['unemployment_rate'].max())
# Replace NaN with a default value
employ2014_["unemployment_rate"] = np.nan_to_num(employ2014_["unemployment_rate"], nan=0)
print(employ2014_['unemployment_rate'].unique())
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
employ2014_.plot(ax=ax, column="unemployment_rate", cmap='Greens', legend="TRUE",
vmax=0.6)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Unemployment Rate of Detroit City, 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('unemployment2014.jpg')
# Show the map
plt.show()
#inspect the data
employ2022_.head(2)
# calculate unemployment rate
employ2022_["unemployment_rate"] = employ2022_["Unemployed"] / employ2022_["LaborForce"]
# find the maximum value of the column to find the scale
employ2022_['unemployment_rate'].max()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
employ2022_.plot(ax=ax, column="unemployment_rate", cmap='Greens', legend="TRUE",
vmax=0.6)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Unemployment Rate of Detroit City, 2022')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('unemployment2022.jpg')
# Show the map
plt.show()
#inspect the data
vacant2014.head(2)
# rename ID column to match with other data
vacant2014 = vacant2014.rename(columns={'GEOID10': 'GEOID'})
# find the maximum value of the column to find the scale
# to avoid bias, value scale in 2014 data and in 2022 data should be the same
vacant2014['Pct_VeryLi'].max()
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
vacant2014.plot(ax=ax, column='Pct_VeryLi', cmap="Blues", legend="true",
vmax=90)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Vacancy Index of Detroit City in 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('vacancy2014.jpg')
# Show the map
plt.show()
#inspect the data
vacant2023.head(5)
# filter vacant data based on vacancy status
filtered_vacant23 = vacant2023[vacant2023['property_1'].isin(['COMMERCIAL VACANT',
'INDUSTRIAL VACANT','RESIDENTIAL VACANT'])]
# check the result
filtered_vacant23.head(2)
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
filtered_vacant23.plot(ax=ax, color='blue', legend='true')
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', label='Proj_NAME', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Vacant Buildings of Detroit City in 2014')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Show the map
plt.show()
This map can not be compared with the vacancy index map in 2014. Therefore, some analysis should be taken to calculate the vacancy index in 2023. The general formula for vacancy index is total vacant buildings divided by total buildings in the same area which is census tract level in this case.
# combine vacant 2023 data with census tract 2020 data
vacant2023_ = gpd.overlay(tract_detroit2020,vacant2023, how="intersection")
# calculate the total building structure for each census tract (GEOID) and assigned to new column
vacant2023_['totalstr'] = vacant2023_.groupby('GEOID')['property_1'].transform('count')
# inspect the result
vacant2023_
# extract only the total building information and GEOID
vacant2023__ = vacant2023_.groupby("GEOID").agg({"totalstr":"first"}).reset_index()
#inspect the result
vacant2023__
# combine vacant 2023 data with census tract 2020 data
filtered_vacant23_ = gpd.overlay(tract_detroit2020,filtered_vacant23, how="intersection")
# calculate the total vacant building for each census tract (GEOID) and assigned to new column
filtered_vacant23_['totalvacant'] = filtered_vacant23_.groupby('GEOID')[
'property_1'].transform('count')
# inspect the result
filtered_vacant23_
# extract only the vacant building information and GEOID
filtered_vacant23__ = filtered_vacant23_.groupby("GEOID").agg({"totalvacant":"first"}).reset_index()
# inspect the result
filtered_vacant23__
The difference of total rows between total building structure and total vacant building indicates some census tracts has no vacant buildings.
# join the total building, total vacant, and GEOID
vacant23_final = filtered_vacant23__.merge(vacant2023__, on='GEOID', how='inner')
# calculate vacancy index and assign to new column
vacant23_final['index'] = (vacant23_final['totalvacant']/vacant23_final['totalstr'])*100
# inspect the result
vacant23_final
# ensure the data type
print(vacant23_final['index'].dtype)
# find the maximum value of the column to find the scale
# to avoid bias, value scale in 2014 data and in 2022 data should be the same
print(vacant23_final['index'].max())
# join vacant 2023 final data with census tract 2020 data to get the geometry
vacant23_final_ = tract_detroit2020.merge(vacant23_final, on='GEOID', how='inner')
# inspect the result
vacant23_final_
# Plot both GeoDataFrames on the same map
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the first GeoDataFrame
vacant23_final_.plot(ax=ax, column='index', cmap='Blues', legend='true',
vmax=90)
# Plot the second GeoDataFrame
snf.plot(ax=ax, color='none', edgecolor='red', legend="TRUE")
# Add labels to the map
for idx, row in snf.iterrows():
centroid = row.geometry.centroid
ax.text(centroid.x, centroid.y, row['Proj_NAME'], fontsize=8, ha='center')
# add title to the map
plt.title('Vacancy Index of Detroit City in 2022')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
# Save the map as an image file
plt.savefig('vacancy2022.jpg')
# Show the map
plt.show()
Regression analysis performed as additional analysis to identify the relationship between the variables. The result provides more understanding about the urban decay in Detroit not just in one variable but also the relationship between variables.
# combine all the variable data and drop geometry to simplify calculation
merged2014 = vacant2014.merge(pop2014_,on="GEOID").merge(
race2014_,on="GEOID").merge(
income2014_,on="GEOID").merge(
employ2014_,on="GEOID").drop(
columns="geometry").drop(
columns="geometry_x").drop(
columns="geometry_y")
# inspect the result
merged2014.head(2)
# Check for NaN or infinity values
if merged2014.isnull().values.any() or not merged2014.isfinite().all().all():
# Handle NaN or infinity values
merged2014.fillna(0, inplace=True) # Replace NaN with 0
merged2014 = merged2014.replace([np.inf, -np.inf], 0) # Replace infinity with 0
def perform_multiple_regression(data, column_names_):
results = {}
for column_name in column_names:
x = data['Pct_VeryLi'].values.reshape((-1, 1))
y = data[column_name]
model = LinearRegression()
model.fit(x, y)
r_squared = model.score(x, y)
results[column_name] = {'coefficients': model.coef_,
'r_squared': r_squared}
return results
# list column names to be read and their output names
column_names = ['Population',
'pop_white0',
'pop_black0',
'Median income (dollar)',
'unemployment_rate']
results = perform_multiple_regression(merged2014, column_names)
# Print the results
for column_name, result in results.items():
print(f"Regression results for {column_name}:")
print("Coefficients:", result['coefficients'])
print("R-squared:", result['r_squared'])
print()
# combine all the variable data and drop geometry to simplify calculation
merged2022 = vacant23_final_.merge(pop2022_,on="GEOID").merge(
race2022_,on="GEOID").merge(
income2022_,on="GEOID").merge(
employ2022_,on="GEOID").drop(
columns="geometry").drop(
columns="geometry_x").drop(
columns="geometry_y")
# inspect the result
merged2022.head(2)
def perform_multiple_regression2(data, column_names):
results = {}
for column_name in column_names:
X = data['index'].values.reshape((-1, 1))
y = data[column_name]
model = LinearRegression()
model.fit(X, y)
r_squared = model.score(X, y)
results[column_name] = {'coefficients': model.coef_,
'r_squared': r_squared}
return results
# use the function for merged2022 data
results = perform_multiple_regression2(merged2022, column_names)
# Print the results
for column_name, result in results.items():
print(f"Regression results for {column_name}:")
print("Coefficients:", result['coefficients'])
print("R-squared:", result['r_squared'])
print()
The changes are found in demographic, economic, and physical aspects in Detroit City as well as in SNF Neighborhoods. Total population is relatively decrease in entire Detroit City but some SNF neighborhoods exhibit a slight increase in population which are Warrendale, Livernois, and Campau. In racial composition, Black residents appeared to dominate and the increase in black communities occurs nearly in entire neighborhoods, while white residents tend to decrease except in Campau, Islandview, and Livernois. The median household income is increasing nearly in whole Detroit City. Of the ten SNF neighborhoods, the most significant increase can be found in the Jefferson neighborhood.
Conversely, the unemployment rate appeared to decrease in 2022 where a lower unemployment rate indicates that fewer people are without jobs. No particular neighborhood shows a noticeable or significant change in this variable. In physical aspect, the vacancy index has significantly increased from 2014 to 2022 in entire parts of Detroit City. The most visible rise can be found in the center and east parts of Detroit where more than 50% of their structures are vacant. Islandview and Jefferson's neighborhood are two noticeable places in this variable.
The additional analysis, regression, showed that all variables have lower significance to building vacancy. However, the direction of the relationship between population, white race, black race, and median household income is found to be negative with building vacancy, while the unemployment rate has a positive relationship.